# mTurk maker 
################################################################################
# Dependencies
################################################################################
library(data.table)
library(purrr)
library(lubridate)
library(readr)
library(dplyr)
library(readr)
library(readxl)
library(irr)
library(tidyr)
################################################################################
# Setup
################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)

setwd(pathName)

emo_vec <- read_file("./emoji_regex_vector.txt")

emojy_replace <- function(string, replacment, emo_vec){
  stringr::str_replace_all(string, emo_vec, replacment)
}
################################################################################
# Relevance
################################################################################
org <- read_xlsx("../data/research_assistants_data/relevance_task/annotation_data_tweetscongress_ra_completed.xlsx")
org <- org %>% filter(!is.na(relevant_paula) & !is.na(relevant_fabio))

org <- org %>% mutate(relevance_agreement_check = ifelse(relevant_paula == relevant_fabio, TRUE, FALSE))

#RA Agreement
org %>% group_by(relevance_agreement_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

org <- org %>% mutate(relevant_ra = ifelse(relevance_agreement_check == T, relevant_fabio, NA))

write_csv(org,"../data/research_assistants_data/relevance_task/training_data_tweetscongress_relevance.csv")
org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))
write_csv(org,"../data/mturk_data/training_data_relevance.csv")
################################################################################
# Frames
################################################################################
org <- read_xlsx("../data/research_assistants_data/frames_task/annotation_data_tweetscongress_ra_completed.xlsx")
org <- org %>% filter(!is.na(relevant_paula) & !is.na(relevant_fabio))

org <- org %>% mutate(relvant_agreement_check = ifelse(relevant_paula == relevant_fabio, TRUE, FALSE))
org <- org %>% mutate(relevant = ifelse(relvant_agreement_check == T, relevant_fabio, NA))
org <- org %>% filter(relevant == 1)

org <- org %>% mutate(frames_agreement_check = ifelse(frame_fabio == frame_paula, TRUE, FALSE))

#RA Agreement
org %>% group_by(frames_agreement_check) %>% summarise(n = n()) %>% mutate(f = n/sum(n))

org <- org %>% mutate(frames_ra = ifelse(frames_agreement_check == TRUE, frame_fabio, NA))

org <- org %>% distinct(status_id, .keep_all = T)

org <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))

write_csv(org,"../data/research_assistants_data/frames_task/training_data_tweetscongress_frames.csv")
org_meysam <- org %>% mutate(text = emojy_replace(text, "", emo_vec = emo_vec))
write_csv(org,"../data/mturk_data/training_data_tweetscongress_frames_final.csv")
